home *** CD-ROM | disk | FTP | other *** search
/ Amiga Plus 2004 #11 / Amiga Plus CD - 2004 - No. 11.iso / AmiSoft / Comm / www / tidy_os4.lha / tidy / src / tags.c < prev    next >
C/C++ Source or Header  |  2004-07-25  |  46KB  |  894 lines

  1. /* tags.c -- recognize HTML tags
  2.  
  3.   (c) 1998-2004 (W3C) MIT, ERCIM, Keio University
  4.   See tidy.h for the copyright notice.
  5.  
  6.   CVS Info :
  7.  
  8.     $Author: hoehrmann $ 
  9.     $Date: 2004/06/18 21:10:32 $ 
  10.     $Revision: 1.48 $ 
  11.  
  12.   The HTML tags are stored as 8 bit ASCII strings.
  13.  
  14. */
  15.  
  16. #include "tags.h"
  17. #include "tidy-int.h"
  18. #include "message.h"
  19. #include "tmbstr.h"
  20.  
  21. #define VERS_ELEM_A          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  22. #define VERS_ELEM_ABBR       (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  23. #define VERS_ELEM_ACRONYM    (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  24. #define VERS_ELEM_ADDRESS    (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  25. #define VERS_ELEM_APPLET     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  26. #define VERS_ELEM_AREA       (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  27. #define VERS_ELEM_B          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  28. #define VERS_ELEM_BASE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  29. #define VERS_ELEM_BASEFONT   (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  30. #define VERS_ELEM_BDO        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  31. #define VERS_ELEM_BIG        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  32. #define VERS_ELEM_BLOCKQUOTE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  33. #define VERS_ELEM_BODY       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  34. #define VERS_ELEM_BR         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  35. #define VERS_ELEM_BUTTON     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  36. #define VERS_ELEM_CAPTION    (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  37. #define VERS_ELEM_CENTER     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  38. #define VERS_ELEM_CITE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  39. #define VERS_ELEM_CODE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  40. #define VERS_ELEM_COL        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  41. #define VERS_ELEM_COLGROUP   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  42. #define VERS_ELEM_DD         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  43. #define VERS_ELEM_DEL        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  44. #define VERS_ELEM_DFN        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  45. #define VERS_ELEM_DIR        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  46. #define VERS_ELEM_DIV        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  47. #define VERS_ELEM_DL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  48. #define VERS_ELEM_DT         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  49. #define VERS_ELEM_EM         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  50. #define VERS_ELEM_FIELDSET   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  51. #define VERS_ELEM_FONT       (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  52. #define VERS_ELEM_FORM       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  53. #define VERS_ELEM_FRAME      (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  54. #define VERS_ELEM_FRAMESET   (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  55. #define VERS_ELEM_H1         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  56. #define VERS_ELEM_H2         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  57. #define VERS_ELEM_H3         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  58. #define VERS_ELEM_H4         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  59. #define VERS_ELEM_H5         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  60. #define VERS_ELEM_H6         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  61. #define VERS_ELEM_HEAD       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  62. #define VERS_ELEM_HR         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  63. #define VERS_ELEM_HTML       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  64. #define VERS_ELEM_I          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  65. #define VERS_ELEM_IFRAME     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  66. #define VERS_ELEM_IMG        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  67. #define VERS_ELEM_INPUT      (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  68. #define VERS_ELEM_INS        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  69. #define VERS_ELEM_ISINDEX    (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  70. #define VERS_ELEM_KBD        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  71. #define VERS_ELEM_LABEL      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  72. #define VERS_ELEM_LEGEND     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  73. #define VERS_ELEM_LI         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  74. #define VERS_ELEM_LINK       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  75. #define VERS_ELEM_LISTING    (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
  76. #define VERS_ELEM_MAP        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  77. #define VERS_ELEM_MENU       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  78. #define VERS_ELEM_META       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  79. #define VERS_ELEM_NEXTID     (HT20|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
  80. #define VERS_ELEM_NOFRAMES   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  81. #define VERS_ELEM_NOSCRIPT   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  82. #define VERS_ELEM_OBJECT     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  83. #define VERS_ELEM_OL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  84. #define VERS_ELEM_OPTGROUP   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  85. #define VERS_ELEM_OPTION     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  86. #define VERS_ELEM_P          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  87. #define VERS_ELEM_PARAM      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  88. #define VERS_ELEM_PLAINTEXT  (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
  89. #define VERS_ELEM_PRE        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  90. #define VERS_ELEM_Q          (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  91. #define VERS_ELEM_RB         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
  92. #define VERS_ELEM_RBC        (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
  93. #define VERS_ELEM_RP         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
  94. #define VERS_ELEM_RT         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
  95. #define VERS_ELEM_RTC        (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
  96. #define VERS_ELEM_RUBY       (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
  97. #define VERS_ELEM_S          (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  98. #define VERS_ELEM_SAMP       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  99. #define VERS_ELEM_SCRIPT     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  100. #define VERS_ELEM_SELECT     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  101. #define VERS_ELEM_SMALL      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  102. #define VERS_ELEM_SPAN       (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  103. #define VERS_ELEM_STRIKE     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  104. #define VERS_ELEM_STRONG     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  105. #define VERS_ELEM_STYLE      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  106. #define VERS_ELEM_SUB        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  107. #define VERS_ELEM_SUP        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  108. #define VERS_ELEM_TABLE      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  109. #define VERS_ELEM_TBODY      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  110. #define VERS_ELEM_TD         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  111. #define VERS_ELEM_TEXTAREA   (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  112. #define VERS_ELEM_TFOOT      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  113. #define VERS_ELEM_TH         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  114. #define VERS_ELEM_THEAD      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  115. #define VERS_ELEM_TITLE      (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  116. #define VERS_ELEM_TR         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  117. #define VERS_ELEM_TT         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
  118. #define VERS_ELEM_U          (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
  119. #define VERS_ELEM_UL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  120. #define VERS_ELEM_VAR        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
  121. #define VERS_ELEM_XMP        (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
  122.  
  123. static const Dict tag_defs[] =
  124. {
  125.   { TidyTag_UNKNOWN,    "unknown!",   VERS_UNKNOWN,         NULL,                       (0),                                           NULL,          NULL           },
  126.  
  127.   /* W3C defined elements */
  128.   { TidyTag_A,          "a",          VERS_ELEM_A,          &W3CAttrsFor_A[0],          (CM_INLINE),                                   ParseInline,   NULL           },
  129.   { TidyTag_ABBR,       "abbr",       VERS_ELEM_ABBR,       &W3CAttrsFor_ABBR[0],       (CM_INLINE),                                   ParseInline,   NULL           },
  130.   { TidyTag_ACRONYM,    "acronym",    VERS_ELEM_ACRONYM,    &W3CAttrsFor_ACRONYM[0],    (CM_INLINE),                                   ParseInline,   NULL           },
  131.   { TidyTag_ADDRESS,    "address",    VERS_ELEM_ADDRESS,    &W3CAttrsFor_ADDRESS[0],    (CM_BLOCK),                                    ParseBlock,    NULL           },
  132.   { TidyTag_APPLET,     "applet",     VERS_ELEM_APPLET,     &W3CAttrsFor_APPLET[0],     (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM),         ParseBlock,    NULL           },
  133.   { TidyTag_AREA,       "area",       VERS_ELEM_AREA,       &W3CAttrsFor_AREA[0],       (CM_BLOCK|CM_EMPTY),                           ParseEmpty,    CheckAREA      },
  134.   { TidyTag_B,          "b",          VERS_ELEM_B,          &W3CAttrsFor_B[0],          (CM_INLINE),                                   ParseInline,   NULL           },
  135.   { TidyTag_BASE,       "base",       VERS_ELEM_BASE,       &W3CAttrsFor_BASE[0],       (CM_HEAD|CM_EMPTY),                            ParseEmpty,    NULL           },
  136.   { TidyTag_BASEFONT,   "basefont",   VERS_ELEM_BASEFONT,   &W3CAttrsFor_BASEFONT[0],   (CM_INLINE|CM_EMPTY),                          ParseEmpty,    NULL           },
  137.   { TidyTag_BDO,        "bdo",        VERS_ELEM_BDO,        &W3CAttrsFor_BDO[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  138.   { TidyTag_BIG,        "big",        VERS_ELEM_BIG,        &W3CAttrsFor_BIG[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  139.   { TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &W3CAttrsFor_BLOCKQUOTE[0], (CM_BLOCK),                                    ParseBlock,    NULL           },
  140.   { TidyTag_BODY,       "body",       VERS_ELEM_BODY,       &W3CAttrsFor_BODY[0],       (CM_HTML|CM_OPT|CM_OMITST),                    ParseBody,     NULL           },
  141.   { TidyTag_BR,         "br",         VERS_ELEM_BR,         &W3CAttrsFor_BR[0],         (CM_INLINE|CM_EMPTY),                          ParseEmpty,    NULL           },
  142.   { TidyTag_BUTTON,     "button",     VERS_ELEM_BUTTON,     &W3CAttrsFor_BUTTON[0],     (CM_INLINE),                                   ParseInline,   NULL           },
  143.   { TidyTag_CAPTION,    "caption",    VERS_ELEM_CAPTION,    &W3CAttrsFor_CAPTION[0],    (CM_TABLE),                                    ParseInline,   CheckCaption   },
  144.   { TidyTag_CENTER,     "center",     VERS_ELEM_CENTER,     &W3CAttrsFor_CENTER[0],     (CM_BLOCK),                                    ParseBlock,    NULL           },
  145.   { TidyTag_CITE,       "cite",       VERS_ELEM_CITE,       &W3CAttrsFor_CITE[0],       (CM_INLINE),                                   ParseInline,   NULL           },
  146.   { TidyTag_CODE,       "code",       VERS_ELEM_CODE,       &W3CAttrsFor_CODE[0],       (CM_INLINE),                                   ParseInline,   NULL           },
  147.   { TidyTag_COL,        "col",        VERS_ELEM_COL,        &W3CAttrsFor_COL[0],        (CM_TABLE|CM_EMPTY),                           ParseEmpty,    NULL           },
  148.   { TidyTag_COLGROUP,   "colgroup",   VERS_ELEM_COLGROUP,   &W3CAttrsFor_COLGROUP[0],   (CM_TABLE|CM_OPT),                             ParseColGroup, NULL           },
  149.   { TidyTag_DD,         "dd",         VERS_ELEM_DD,         &W3CAttrsFor_DD[0],         (CM_DEFLIST|CM_OPT|CM_NO_INDENT),              ParseBlock,    NULL           },
  150.   { TidyTag_DEL,        "del",        VERS_ELEM_DEL,        &W3CAttrsFor_DEL[0],        (CM_INLINE|CM_BLOCK|CM_MIXED),                 ParseInline,   NULL           },
  151.   { TidyTag_DFN,        "dfn",        VERS_ELEM_DFN,        &W3CAttrsFor_DFN[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  152.   { TidyTag_DIR,        "dir",        VERS_ELEM_DIR,        &W3CAttrsFor_DIR[0],        (CM_BLOCK|CM_OBSOLETE),                        ParseList,     NULL           },
  153.   { TidyTag_DIV,        "div",        VERS_ELEM_DIV,        &W3CAttrsFor_DIV[0],        (CM_BLOCK),                                    ParseBlock,    NULL           },
  154.   { TidyTag_DL,         "dl",         VERS_ELEM_DL,         &W3CAttrsFor_DL[0],         (CM_BLOCK),                                    ParseDefList,  NULL           },
  155.   { TidyTag_DT,         "dt",         VERS_ELEM_DT,         &W3CAttrsFor_DT[0],         (CM_DEFLIST|CM_OPT|CM_NO_INDENT),              ParseInline,   NULL           },
  156.   { TidyTag_EM,         "em",         VERS_ELEM_EM,         &W3CAttrsFor_EM[0],         (CM_INLINE),                                   ParseInline,   NULL           },
  157.   { TidyTag_FIELDSET,   "fieldset",   VERS_ELEM_FIELDSET,   &W3CAttrsFor_FIELDSET[0],   (CM_BLOCK),                                    ParseBlock,    NULL           },
  158.   { TidyTag_FONT,       "font",       VERS_ELEM_FONT,       &W3CAttrsFor_FONT[0],       (CM_INLINE),                                   ParseInline,   NULL           },
  159.   { TidyTag_FORM,       "form",       VERS_ELEM_FORM,       &W3CAttrsFor_FORM[0],       (CM_BLOCK),                                    ParseBlock,    CheckFORM      },
  160.   { TidyTag_FRAME,      "frame",      VERS_ELEM_FRAME,      &W3CAttrsFor_FRAME[0],      (CM_FRAMES|CM_EMPTY),                          ParseEmpty,    NULL           },
  161.   { TidyTag_FRAMESET,   "frameset",   VERS_ELEM_FRAMESET,   &W3CAttrsFor_FRAMESET[0],   (CM_HTML|CM_FRAMES),                           ParseFrameSet, NULL           },
  162.   { TidyTag_H1,         "h1",         VERS_ELEM_H1,         &W3CAttrsFor_H1[0],         (CM_BLOCK|CM_HEADING),                         ParseInline,   NULL           },
  163.   { TidyTag_H2,         "h2",         VERS_ELEM_H2,         &W3CAttrsFor_H2[0],         (CM_BLOCK|CM_HEADING),                         ParseInline,   NULL           },
  164.   { TidyTag_H3,         "h3",         VERS_ELEM_H3,         &W3CAttrsFor_H3[0],         (CM_BLOCK|CM_HEADING),                         ParseInline,   NULL           },
  165.   { TidyTag_H4,         "h4",         VERS_ELEM_H4,         &W3CAttrsFor_H4[0],         (CM_BLOCK|CM_HEADING),                         ParseInline,   NULL           },
  166.   { TidyTag_H5,         "h5",         VERS_ELEM_H5,         &W3CAttrsFor_H5[0],         (CM_BLOCK|CM_HEADING),                         ParseInline,   NULL           },
  167.   { TidyTag_H6,         "h6",         VERS_ELEM_H6,         &W3CAttrsFor_H6[0],         (CM_BLOCK|CM_HEADING),                         ParseInline,   NULL           },
  168.   { TidyTag_HEAD,       "head",       VERS_ELEM_HEAD,       &W3CAttrsFor_HEAD[0],       (CM_HTML|CM_OPT|CM_OMITST),                    ParseHead,     NULL           },
  169.   { TidyTag_HR,         "hr",         VERS_ELEM_HR,         &W3CAttrsFor_HR[0],         (CM_BLOCK|CM_EMPTY),                           ParseEmpty,    NULL           },
  170.   { TidyTag_HTML,       "html",       VERS_ELEM_HTML,       &W3CAttrsFor_HTML[0],       (CM_HTML|CM_OPT|CM_OMITST),                    ParseHTML,     CheckHTML      },
  171.   { TidyTag_I,          "i",          VERS_ELEM_I,          &W3CAttrsFor_I[0],          (CM_INLINE),                                   ParseInline,   NULL           },
  172.   { TidyTag_IFRAME,     "iframe",     VERS_ELEM_IFRAME,     &W3CAttrsFor_IFRAME[0],     (CM_INLINE),                                   ParseBlock,    NULL           },
  173.   { TidyTag_IMG,        "img",        VERS_ELEM_IMG,        &W3CAttrsFor_IMG[0],        (CM_INLINE|CM_IMG|CM_EMPTY),                   ParseEmpty,    CheckIMG       },
  174.   { TidyTag_INPUT,      "input",      VERS_ELEM_INPUT,      &W3CAttrsFor_INPUT[0],      (CM_INLINE|CM_IMG|CM_EMPTY),                   ParseEmpty,    NULL           },
  175.   { TidyTag_INS,        "ins",        VERS_ELEM_INS,        &W3CAttrsFor_INS[0],        (CM_INLINE|CM_BLOCK|CM_MIXED),                 ParseInline,   NULL           },
  176.   { TidyTag_ISINDEX,    "isindex",    VERS_ELEM_ISINDEX,    &W3CAttrsFor_ISINDEX[0],    (CM_BLOCK|CM_EMPTY),                           ParseEmpty,    NULL           },
  177.   { TidyTag_KBD,        "kbd",        VERS_ELEM_KBD,        &W3CAttrsFor_KBD[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  178.   { TidyTag_LABEL,      "label",      VERS_ELEM_LABEL,      &W3CAttrsFor_LABEL[0],      (CM_INLINE),                                   ParseInline,   NULL           },
  179.   { TidyTag_LEGEND,     "legend",     VERS_ELEM_LEGEND,     &W3CAttrsFor_LEGEND[0],     (CM_INLINE),                                   ParseInline,   NULL           },
  180.   { TidyTag_LI,         "li",         VERS_ELEM_LI,         &W3CAttrsFor_LI[0],         (CM_LIST|CM_OPT|CM_NO_INDENT),                 ParseBlock,    NULL           },
  181.   { TidyTag_LINK,       "link",       VERS_ELEM_LINK,       &W3CAttrsFor_LINK[0],       (CM_HEAD|CM_EMPTY),                            ParseEmpty,    CheckLINK      },
  182.   { TidyTag_LISTING,    "listing",    VERS_ELEM_LISTING,    &W3CAttrsFor_LISTING[0],    (CM_BLOCK|CM_OBSOLETE),                        ParsePre,      NULL           },
  183.   { TidyTag_MAP,        "map",        VERS_ELEM_MAP,        &W3CAttrsFor_MAP[0],        (CM_INLINE),                                   ParseBlock,    NULL           },
  184.   { TidyTag_MENU,       "menu",       VERS_ELEM_MENU,       &W3CAttrsFor_MENU[0],       (CM_BLOCK|CM_OBSOLETE),                        ParseList,     NULL           },
  185.   { TidyTag_META,       "meta",       VERS_ELEM_META,       &W3CAttrsFor_META[0],       (CM_HEAD|CM_EMPTY),                            ParseEmpty,    CheckMETA      },
  186.   { TidyTag_NOFRAMES,   "noframes",   VERS_ELEM_NOFRAMES,   &W3CAttrsFor_NOFRAMES[0],   (CM_BLOCK|CM_FRAMES),                          ParseNoFrames, NULL           },
  187.   { TidyTag_NOSCRIPT,   "noscript",   VERS_ELEM_NOSCRIPT,   &W3CAttrsFor_NOSCRIPT[0],   (CM_BLOCK|CM_INLINE|CM_MIXED),                 ParseBlock,    NULL           },
  188.   { TidyTag_OBJECT,     "object",     VERS_ELEM_OBJECT,     &W3CAttrsFor_OBJECT[0],     (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), ParseBlock,    NULL           },
  189.   { TidyTag_OL,         "ol",         VERS_ELEM_OL,         &W3CAttrsFor_OL[0],         (CM_BLOCK),                                    ParseList,     NULL           },
  190.   { TidyTag_OPTGROUP,   "optgroup",   VERS_ELEM_OPTGROUP,   &W3CAttrsFor_OPTGROUP[0],   (CM_FIELD|CM_OPT),                             ParseOptGroup, NULL           },
  191.   { TidyTag_OPTION,     "option",     VERS_ELEM_OPTION,     &W3CAttrsFor_OPTION[0],     (CM_FIELD|CM_OPT),                             ParseText,     NULL           },
  192.   { TidyTag_P,          "p",          VERS_ELEM_P,          &W3CAttrsFor_P[0],          (CM_BLOCK|CM_OPT),                             ParseInline,   NULL           },
  193.   { TidyTag_PARAM,      "param",      VERS_ELEM_PARAM,      &W3CAttrsFor_PARAM[0],      (CM_INLINE|CM_EMPTY),                          ParseEmpty,    NULL           },
  194.   { TidyTag_PLAINTEXT,  "plaintext",  VERS_ELEM_PLAINTEXT,  &W3CAttrsFor_PLAINTEXT[0],  (CM_BLOCK|CM_OBSOLETE),                        ParsePre,      NULL           },
  195.   { TidyTag_PRE,        "pre",        VERS_ELEM_PRE,        &W3CAttrsFor_PRE[0],        (CM_BLOCK),                                    ParsePre,      NULL           },
  196.   { TidyTag_Q,          "q",          VERS_ELEM_Q,          &W3CAttrsFor_Q[0],          (CM_INLINE),                                   ParseInline,   NULL           },
  197.   { TidyTag_RB,         "rb",         VERS_ELEM_RB,         &W3CAttrsFor_RB[0],         (CM_INLINE),                                   ParseInline,   NULL           },
  198.   { TidyTag_RBC,        "rbc",        VERS_ELEM_RBC,        &W3CAttrsFor_RBC[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  199.   { TidyTag_RP,         "rp",         VERS_ELEM_RP,         &W3CAttrsFor_RP[0],         (CM_INLINE),                                   ParseInline,   NULL           },
  200.   { TidyTag_RT,         "rt",         VERS_ELEM_RT,         &W3CAttrsFor_RT[0],         (CM_INLINE),                                   ParseInline,   NULL           },
  201.   { TidyTag_RTC,        "rtc",        VERS_ELEM_RTC,        &W3CAttrsFor_RTC[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  202.   { TidyTag_RUBY,       "ruby",       VERS_ELEM_RUBY,       &W3CAttrsFor_RUBY[0],       (CM_INLINE),                                   ParseInline,   NULL           },
  203.   { TidyTag_S,          "s",          VERS_ELEM_S,          &W3CAttrsFor_S[0],          (CM_INLINE),                                   ParseInline,   NULL           },
  204.   { TidyTag_SAMP,       "samp",       VERS_ELEM_SAMP,       &W3CAttrsFor_SAMP[0],       (CM_INLINE),                                   ParseInline,   NULL           },
  205.   { TidyTag_SCRIPT,     "script",     VERS_ELEM_SCRIPT,     &W3CAttrsFor_SCRIPT[0],     (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE),         ParseScript,   CheckSCRIPT    },
  206.   { TidyTag_SELECT,     "select",     VERS_ELEM_SELECT,     &W3CAttrsFor_SELECT[0],     (CM_INLINE|CM_FIELD),                          ParseSelect,   NULL           },
  207.   { TidyTag_SMALL,      "small",      VERS_ELEM_SMALL,      &W3CAttrsFor_SMALL[0],      (CM_INLINE),                                   ParseInline,   NULL           },
  208.   { TidyTag_SPAN,       "span",       VERS_ELEM_SPAN,       &W3CAttrsFor_SPAN[0],       (CM_INLINE),                                   ParseInline,   NULL           },
  209.   { TidyTag_STRIKE,     "strike",     VERS_ELEM_STRIKE,     &W3CAttrsFor_STRIKE[0],     (CM_INLINE),                                   ParseInline,   NULL           },
  210.   { TidyTag_STRONG,     "strong",     VERS_ELEM_STRONG,     &W3CAttrsFor_STRONG[0],     (CM_INLINE),                                   ParseInline,   NULL           },
  211.   { TidyTag_STYLE,      "style",      VERS_ELEM_STYLE,      &W3CAttrsFor_STYLE[0],      (CM_HEAD),                                     ParseScript,   CheckSTYLE     },
  212.   { TidyTag_SUB,        "sub",        VERS_ELEM_SUB,        &W3CAttrsFor_SUB[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  213.   { TidyTag_SUP,        "sup",        VERS_ELEM_SUP,        &W3CAttrsFor_SUP[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  214.   { TidyTag_TABLE,      "table",      VERS_ELEM_TABLE,      &W3CAttrsFor_TABLE[0],      (CM_BLOCK),                                    ParseTableTag, CheckTABLE     },
  215.   { TidyTag_TBODY,      "tbody",      VERS_ELEM_TBODY,      &W3CAttrsFor_TBODY[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   ParseRowGroup, NULL           },
  216.   { TidyTag_TD,         "td",         VERS_ELEM_TD,         &W3CAttrsFor_TD[0],         (CM_ROW|CM_OPT|CM_NO_INDENT),                  ParseBlock,    NULL           },
  217.   { TidyTag_TEXTAREA,   "textarea",   VERS_ELEM_TEXTAREA,   &W3CAttrsFor_TEXTAREA[0],   (CM_INLINE|CM_FIELD),                          ParseText,     NULL           },
  218.   { TidyTag_TFOOT,      "tfoot",      VERS_ELEM_TFOOT,      &W3CAttrsFor_TFOOT[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   ParseRowGroup, NULL           },
  219.   { TidyTag_TH,         "th",         VERS_ELEM_TH,         &W3CAttrsFor_TH[0],         (CM_ROW|CM_OPT|CM_NO_INDENT),                  ParseBlock,    NULL           },
  220.   { TidyTag_THEAD,      "thead",      VERS_ELEM_THEAD,      &W3CAttrsFor_THEAD[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   ParseRowGroup, NULL           },
  221.   { TidyTag_TITLE,      "title",      VERS_ELEM_TITLE,      &W3CAttrsFor_TITLE[0],      (CM_HEAD),                                     ParseTitle,    NULL           },
  222.   { TidyTag_TR,         "tr",         VERS_ELEM_TR,         &W3CAttrsFor_TR[0],         (CM_TABLE|CM_OPT),                             ParseRow,      NULL           },
  223.   { TidyTag_TT,         "tt",         VERS_ELEM_TT,         &W3CAttrsFor_TT[0],         (CM_INLINE),                                   ParseInline,   NULL           },
  224.   { TidyTag_U,          "u",          VERS_ELEM_U,          &W3CAttrsFor_U[0],          (CM_INLINE),                                   ParseInline,   NULL           },
  225.   { TidyTag_UL,         "ul",         VERS_ELEM_UL,         &W3CAttrsFor_UL[0],         (CM_BLOCK),                                    ParseList,     NULL           },
  226.   { TidyTag_VAR,        "var",        VERS_ELEM_VAR,        &W3CAttrsFor_VAR[0],        (CM_INLINE),                                   ParseInline,   NULL           },
  227.   { TidyTag_XMP,        "xmp",        VERS_ELEM_XMP,        &W3CAttrsFor_XMP[0],        (CM_BLOCK|CM_OBSOLETE),                        ParsePre,      NULL           },
  228.   { TidyTag_NEXTID,     "nextid",     VERS_ELEM_NEXTID,     &W3CAttrsFor_NEXTID[0],     (CM_HEAD|CM_EMPTY),                            ParseEmpty,    NULL           },
  229.  
  230.   /* proprietary elements */
  231.   { TidyTag_ALIGN,      "align",      VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    ParseBlock,    NULL           },
  232.   { TidyTag_BGSOUND,    "bgsound",    VERS_MICROSOFT,       NULL,                       (CM_HEAD|CM_EMPTY),                            ParseEmpty,    NULL           },
  233.   { TidyTag_BLINK,      "blink",      VERS_PROPRIETARY,     NULL,                       (CM_INLINE),                                   ParseInline,   NULL           },
  234.   { TidyTag_COMMENT,    "comment",    VERS_MICROSOFT,       NULL,                       (CM_INLINE),                                   ParseInline,   NULL           },
  235.   { TidyTag_EMBED,      "embed",      VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_IMG|CM_EMPTY),                   ParseEmpty,    NULL           },
  236.   { TidyTag_ILAYER,     "ilayer",     VERS_NETSCAPE,        NULL,                       (CM_INLINE),                                   ParseInline,   NULL           },
  237.   { TidyTag_KEYGEN,     "keygen",     VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_EMPTY),                          ParseEmpty,    NULL           },
  238.   { TidyTag_LAYER,      "layer",      VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    ParseBlock,    NULL           },
  239.   { TidyTag_MARQUEE,    "marquee",    VERS_MICROSOFT,       NULL,                       (CM_INLINE|CM_OPT),                            ParseInline,   NULL           },
  240.   { TidyTag_MULTICOL,   "multicol",   VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    ParseBlock,    NULL           },
  241.   { TidyTag_NOBR,       "nobr",       VERS_PROPRIETARY,     NULL,                       (CM_INLINE),                                   ParseInline,   NULL           },
  242.   { TidyTag_NOEMBED,    "noembed",    VERS_NETSCAPE,        NULL,                       (CM_INLINE),                                   ParseInline,   NULL           },
  243.   { TidyTag_NOLAYER,    "nolayer",    VERS_NETSCAPE,        NULL,                       (CM_BLOCK|CM_INLINE|CM_MIXED),                 ParseBlock,    NULL           },
  244.   { TidyTag_NOSAVE,     "nosave",     VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    ParseBlock,    NULL           },
  245.   { TidyTag_SERVER,     "server",     VERS_NETSCAPE,        NULL,                       (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE),         ParseScript,   NULL           },
  246.   { TidyTag_SERVLET,    "servlet",    VERS_SUN,             NULL,                       (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM),         ParseBlock,    NULL           },
  247.   { TidyTag_SPACER,     "spacer",     VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_EMPTY),                          ParseEmpty,    NULL           },
  248.   { TidyTag_WBR,        "wbr",        VERS_PROPRIETARY,     NULL,                       (CM_INLINE|CM_EMPTY),                          ParseEmpty,    NULL           },
  249.  
  250.   /* this must be the final entry */
  251.   { (TidyTagId)0,        NULL,         0,                    NULL,                       (0),                                           NULL,          NULL           }
  252. };
  253.  
  254. #ifdef ELEMENT_HASH_LOOKUP
  255. static uint hash(ctmbstr s)
  256. {
  257.     uint hashval;
  258.  
  259.     for (hashval = 0; *s != '\0'; s++)
  260.         hashval = *s + 31*hashval;
  261.  
  262.     return hashval % ELEMENT_HASH_SIZE;
  263. }
  264.  
  265. static Dict *install(TidyTagImpl* tags, const Dict* old)
  266. {
  267.     Dict *np;
  268.     uint hashval;
  269.  
  270.     np = (Dict *)MemAlloc(sizeof(*np));
  271.     np->name = tmbstrdup(old->name);
  272.  
  273.     hashval = hash(np->name);
  274.     np->next = tags->hashtab[hashval];
  275.     tags->hashtab[hashval] = np;
  276.  
  277.     np->id       = old->id;
  278.     np->versions = old->versions;
  279.     np->model    = old->model;
  280.     np->parser   = old->parser;
  281.     np->chkattrs = old->chkattrs;
  282.     np->attrvers = old->attrvers;
  283.  
  284.     return np;
  285. }
  286. #endif /* ELEMENT_HASH_LOOKUP */
  287.  
  288. static const Dict* lookup( TidyTagImpl* tags, ctmbstr s )
  289. {
  290.     const Dict *np;
  291.  
  292.     if (!s)
  293.         return NULL;
  294.  
  295. #ifdef ELEMENT_HASH_LOOKUP
  296.     /* this breaks if declared elements get changed between two   */
  297.     /* parser runs since Tidy would use the cached version rather */
  298.     /* than the new one                                           */
  299.     for (np = tags->hashtab[hash(s)]; np != NULL; np = np->next)
  300.         if (tmbstrcmp(s, np->name) == 0)
  301.             return np;
  302.  
  303.     for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
  304.         if (tmbstrcmp(s, np->name) == 0)
  305.             return install(tags, np);
  306.  
  307.     for (np = tags->declared_tag_list; np; np = np->next)
  308.         if (tmbstrcmp(s, np->name) == 0)
  309.             return install(tags, np);
  310. #else
  311.  
  312.     for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
  313.         if (tmbstrcmp(s, np->name) == 0)
  314.             return np;
  315.  
  316.     for (np = tags->declared_tag_list; np; np = np->next)
  317.         if (tmbstrcmp(s, np->name) == 0)
  318.             return np;
  319.  
  320. #endif /* ELEMENT_HASH_LOOKUP */
  321.  
  322.     return NULL;
  323. }
  324.  
  325.  
  326. static void declare( TidyTagImpl* tags,
  327.                      ctmbstr name, uint versions, uint model, 
  328.                      Parser *parser, CheckAttribs *chkattrs )
  329. {
  330.     if ( name )
  331.     {
  332.         Dict* np = (Dict*) lookup( tags, name );
  333.         if ( np == NULL )
  334.         {
  335.             np = (Dict*) MemAlloc( sizeof(Dict) );
  336.             ClearMemory( np, sizeof(Dict) );
  337.  
  338.             np->name = tmbstrdup( name );
  339.             np->next = tags->declared_tag_list;
  340.             tags->declared_tag_list = np;
  341.         }
  342.  
  343.         /* Make sure we are not over-writing predefined tags */
  344.         if ( np->id == TidyTag_UNKNOWN )
  345.         {
  346.           np->versions = versions;
  347.           np->model   |= model;
  348.           np->parser   = parser;
  349.           np->chkattrs = chkattrs;
  350.           np->attrvers = NULL;
  351.         }
  352.     }
  353. }
  354.  
  355. /* public interface for finding tag by name */
  356. Bool FindTag( TidyDocImpl* doc, Node *node )
  357. {
  358.     const Dict *np = NULL;
  359.     if ( cfgBool(doc, TidyXmlTags) )
  360.     {
  361.         node->tag = doc->tags.xml_tags;
  362.         return yes;
  363.     }
  364.  
  365.     if ( node->element && (np = lookup(&doc->tags, node->element)) )
  366.     {
  367.         node->tag = np;
  368.         return yes;
  369.     }
  370.     
  371.     return no;
  372. }
  373.  
  374. const Dict* LookupTagDef( TidyTagId tid )
  375. {
  376.     const Dict *np;
  377.  
  378.     for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np )
  379.         if (np->id == tid)
  380.             return np;
  381.  
  382.     return NULL;    
  383. }
  384.  
  385. Parser* FindParser( TidyDocImpl* doc, Node *node )
  386. {
  387.     const Dict* np = lookup( &doc->tags, node->element );
  388.     if ( np )
  389.         return np->parser;
  390.     return NULL;
  391. }
  392.  
  393. void DefineTag( TidyDocImpl* doc, int tagType, ctmbstr name )
  394. {
  395.     Parser* parser = NULL;
  396.     uint cm = 0;
  397.     uint vers = VERS_PROPRIETARY;
  398.  
  399.     switch (tagType)
  400.     {
  401.     case tagtype_empty:
  402.         cm = CM_EMPTY|CM_NO_INDENT|CM_NEW;
  403.         parser = ParseBlock;
  404.         break;
  405.  
  406.     case tagtype_inline:
  407.         cm = CM_INLINE|CM_NO_INDENT|CM_NEW;
  408.         parser = ParseInline;
  409.         break;
  410.  
  411.     case tagtype_block:
  412.         cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
  413.         parser = ParseBlock;
  414.         break;
  415.  
  416.     case tagtype_pre:
  417.         cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
  418.         parser = ParsePre;
  419.         break;
  420.     }
  421.     if ( cm && parser )
  422.         declare( &doc->tags, name, vers, cm, parser, NULL );
  423. }
  424.  
  425. TidyIterator   GetDeclaredTagList( TidyDocImpl* doc )
  426. {
  427.     return (TidyIterator) doc->tags.declared_tag_list;
  428. }
  429.  
  430. ctmbstr        GetNextDeclaredTag( TidyDocImpl* doc, int tagType,
  431.                                    TidyIterator* iter )
  432. {
  433. #pragma unused(doc)
  434.  
  435.     ctmbstr name = NULL;
  436.     Dict* curr;
  437.     for ( curr = (Dict*) *iter; name == NULL && curr != NULL; curr = curr->next )
  438.     {
  439.         switch ( tagType )
  440.         {
  441.         case tagtype_empty:
  442.             if ( curr->model & CM_EMPTY )
  443.                 name = curr->name;
  444.             break;
  445.  
  446.         case tagtype_inline:
  447.             if ( curr->model & CM_INLINE )
  448.                 name = curr->name;
  449.             break;
  450.  
  451.         case tagtype_block:
  452.             if ( (curr->model & CM_BLOCK) &&
  453.                  curr->parser == ParseBlock )
  454.                 name = curr->name;
  455.             break;
  456.     
  457.         case tagtype_pre:
  458.             if ( (curr->model & CM_BLOCK) &&
  459.                  curr->parser == ParsePre )
  460.                 name = curr->name;
  461.             break;
  462.         }
  463.     }
  464.     *iter = (TidyIterator) curr;
  465.     return name;
  466. }
  467.  
  468. void InitTags( TidyDocImpl* doc )
  469. {
  470.     Dict* xml;
  471.     TidyTagImpl* tags = &doc->tags;
  472.  
  473.     ClearMemory( tags, sizeof(TidyTagImpl) );
  474.  
  475.     /* create dummy entry for all xml tags */
  476.     xml = (Dict*) MemAlloc( sizeof(Dict) );
  477.     ClearMemory( xml, sizeof(Dict) );
  478.     xml->name = NULL;
  479.     xml->versions = VERS_XML;
  480.     xml->model = CM_BLOCK;
  481.     xml->parser = NULL;
  482.     xml->chkattrs = NULL;
  483.     xml->attrvers = NULL;
  484.     tags->xml_tags = xml;
  485. }
  486.  
  487. /* By default, zap all of them.  But allow
  488. ** an single type to be specified.
  489. */
  490. void FreeDeclaredTags( TidyDocImpl* doc, int tagType )
  491. {
  492.     TidyTagImpl* tags = &doc->tags;
  493.     Dict *curr, *next = NULL, *prev = NULL;
  494.  
  495.     for ( curr=tags->declared_tag_list; curr; curr = next )
  496.     {
  497.         Bool deleteIt = yes;
  498.         next = curr->next;
  499.         switch ( tagType )
  500.         {
  501.         case tagtype_empty:
  502.             deleteIt = ( curr->model & CM_EMPTY );
  503.             break;
  504.  
  505.         case tagtype_inline:
  506.             deleteIt = ( curr->model & CM_INLINE );
  507.             break;
  508.  
  509.         case tagtype_block:
  510.             deleteIt = ( (curr->model & CM_BLOCK) &&
  511.                          curr->parser == ParseBlock );
  512.             break;
  513.     
  514.         case tagtype_pre:
  515.             deleteIt = ( (curr->model & CM_BLOCK) &&
  516.                          curr->parser == ParsePre );
  517.             break;
  518.         }
  519.  
  520.         if ( deleteIt )
  521.         {
  522.           MemFree( curr->name );
  523.           MemFree( curr );
  524.           if ( prev )
  525.             prev->next = next;
  526.           else
  527.             tags->declared_tag_list = next;
  528.         }
  529.         else
  530.           prev = curr;
  531.     }
  532. }
  533.  
  534. void FreeTags( TidyDocImpl* doc )
  535. {
  536.     TidyTagImpl* tags = &doc->tags;
  537.  
  538. #ifdef ELEMENT_HASH_LOOKUP
  539.     uint i;
  540.     Dict *prev, *next;
  541.  
  542.     for (i = 0; i < ELEMENT_HASH_SIZE; ++i)
  543.     {
  544.         prev = NULL;
  545.         next = tags->hashtab[i];
  546.  
  547.         while(next)
  548.         {
  549.             prev = next->next;
  550.             MemFree(next->name);
  551.             MemFree(next);
  552.             next = prev;
  553.         }
  554.  
  555.         tags->hashtab[i] = NULL;
  556.     }
  557. #endif
  558.  
  559.     FreeDeclaredTags( doc, 0 );
  560.     MemFree( tags->xml_tags );
  561.  
  562.     /* get rid of dangling tag references */
  563.     ClearMemory( tags, sizeof(TidyTagImpl) );
  564. }
  565.  
  566.  
  567. /* default method for checking an element's attributes */
  568. void CheckAttributes( TidyDocImpl* doc, Node *node )
  569. {
  570.     AttVal *next, *attval = node->attributes;
  571.     while (attval)
  572.     {
  573.         next = attval->next;
  574.         CheckAttribute( doc, node, attval );
  575.         attval = next;
  576.     }
  577. }
  578.  
  579. /* methods for checking attributes for specific elements */
  580.  
  581. void CheckIMG( TidyDocImpl* doc, Node *node )
  582. {
  583.     Bool HasAlt = AttrGetById(node, TidyAttr_ALT) != NULL;
  584.     Bool HasSrc = AttrGetById(node, TidyAttr_SRC) != NULL;
  585.     Bool HasUseMap = AttrGetById(node, TidyAttr_USEMAP) != NULL;
  586.     Bool HasIsMap = AttrGetById(node, TidyAttr_ISMAP) != NULL;
  587.     Bool HasDataFld = AttrGetById(node, TidyAttr_DATAFLD) != NULL;
  588.  
  589.     CheckAttributes(doc, node);
  590.  
  591.     if ( !HasAlt )
  592.     {
  593.         if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
  594.         {
  595.             doc->badAccess |= MISSING_IMAGE_ALT;
  596.             ReportMissingAttr( doc, node, "alt" );
  597.         }
  598.   
  599.         if ( cfgStr(doc, TidyAltText) )
  600.             AddAttribute( doc, node, "alt", cfgStr(doc, TidyAltText) );
  601.     }
  602.  
  603.     if ( !HasSrc && !HasDataFld )
  604.         ReportMissingAttr( doc, node, "src" );
  605.  
  606.     if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
  607.     {
  608.         if ( HasIsMap && !HasUseMap )
  609.             ReportMissingAttr( doc, node, "ismap" );
  610.     }
  611. }
  612.  
  613. void CheckCaption(TidyDocImpl* doc, Node *node)
  614. {
  615.     AttVal *attval;
  616.  
  617.     CheckAttributes(doc, node);
  618.  
  619.     attval = AttrGetById(node, TidyAttr_ALIGN);
  620.  
  621.     if (!AttrHasValue(attval))
  622.         return;
  623.  
  624.     if (AttrValueIs(attval, "left") || AttrValueIs(attval, "right"))
  625.         ConstrainVersion(doc, VERS_HTML40_LOOSE);
  626.     else if (AttrValueIs(attval, "top") || AttrValueIs(attval, "bottom"))
  627.         ConstrainVersion(doc, ~(VERS_HTML20|VERS_HTML32));
  628.     else
  629.         ReportAttrError(doc, node, attval, BAD_ATTRIBUTE_VALUE);
  630. }
  631.  
  632. void CheckHTML( TidyDocImpl* doc, Node *node )
  633. {
  634.     AttVal *xmlns;
  635.  
  636.     xmlns = AttrGetById(node, TidyAttr_XMLNS);
  637.  
  638.     if (AttrMatches(xmlns, XHTML_NAMESPACE))
  639.     {
  640.         Bool htmlOut = cfgBool( doc, TidyHtmlOut );
  641.         doc->lexer->isvoyager = yes;                  /* Unless plain HTML */
  642.         SetOptionBool( doc, TidyXhtmlOut, !htmlOut ); /* is specified, output*/
  643.         SetOptionBool( doc, TidyXmlOut, !htmlOut );   /* will be XHTML. */
  644.  
  645.         /* adjust other config options, just as in config.c */
  646.         if ( !htmlOut )
  647.         {
  648.             SetOptionBool( doc, TidyUpperCaseTags, no );
  649.             SetOptionBool( doc, TidyUpperCaseAttrs, no );
  650.         }
  651.     }
  652.  
  653.     CheckAttributes(doc, node);
  654.  
  655. }
  656.  
  657. void CheckAREA( TidyDocImpl* doc, Node *node )
  658. {
  659.     Bool HasAlt = AttrGetById(node, TidyAttr_ALT) != NULL;
  660.     Bool HasHref = AttrGetById(node, TidyAttr_HREF) != NULL;
  661.  
  662.     CheckAttributes(doc, node);
  663.  
  664.     if ( !HasAlt )
  665.     {
  666.         if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
  667.         {
  668.             doc->badAccess |= MISSING_LINK_ALT;
  669.             ReportMissingAttr( doc, node, "alt" );
  670.         }
  671.     }
  672.  
  673.     if ( !HasHref )
  674.         ReportMissingAttr( doc, node, "href" );
  675. }
  676.  
  677. void CheckTABLE( TidyDocImpl* doc, Node *node )
  678. {
  679.     AttVal* attval;
  680.     Bool HasSummary = AttrGetById(node, TidyAttr_SUMMARY) != NULL;
  681.  
  682.     CheckAttributes(doc, node);
  683.  
  684.     /* a missing summary attribute is bad accessibility, no matter
  685.        what HTML version is involved; a document wihtout is valid */
  686.     if (cfg(doc, TidyAccessibilityCheckLevel) == 0)
  687.     {
  688.         if (!HasSummary)
  689.         {
  690.             doc->badAccess |= MISSING_SUMMARY;
  691.             ReportMissingAttr( doc, node, "summary");
  692.         }
  693.     }
  694.  
  695.     /* convert <table border> to <table border="1"> */
  696.     if ( cfgBool(doc, TidyXmlOut) && (attval = AttrGetById(node, TidyAttr_BORDER)) )
  697.     {
  698.         if (attval->value == NULL)
  699.             attval->value = tmbstrdup("1");
  700.     }
  701. }
  702.  
  703. /* add missing type attribute when appropriate */
  704. void CheckSCRIPT( TidyDocImpl* doc, Node *node )
  705. {
  706.     AttVal *lang, *type;
  707.     char buf[16];
  708.  
  709.     CheckAttributes( doc, node );
  710.  
  711.     lang = AttrGetById(node, TidyAttr_LANGUAGE);
  712.     type = AttrGetById(node, TidyAttr_TYPE);
  713.  
  714.     if ( !type )
  715.     {
  716.         /*  ReportMissingAttr( doc, node, "type" );  */
  717.  
  718.         /* check for javascript */
  719.         if ( lang )
  720.         {
  721.             tmbstrncpy( buf, lang->value, sizeof(buf) );
  722.             buf[10] = '\0';
  723.  
  724.             if ( tmbstrncasecmp(buf, "javascript", 10) == 0 ||
  725.                  tmbstrncasecmp(buf,    "jscript", 7) == 0 )
  726.             {
  727.                 AddAttribute( doc, node, "type", "text/javascript" );
  728.             }
  729.             else if ( tmbstrcasecmp(buf, "vbscript") == 0 )
  730.             {
  731.                 /* per Randy Waki 8/6/01 */
  732.                 AddAttribute( doc, node, "type", "text/vbscript" );
  733.             }
  734.         }
  735.         else
  736.             AddAttribute( doc, node, "type", "text/javascript" );
  737.         type = AttrGetById(node, TidyAttr_TYPE);
  738.         ReportAttrError( doc, node, type, INSERTING_ATTRIBUTE );
  739.     }
  740. }
  741.  
  742.  
  743. /* add missing type attribute when appropriate */
  744. void CheckSTYLE( TidyDocImpl* doc, Node *node )
  745. {
  746.     AttVal *type = AttrGetById(node, TidyAttr_TYPE);
  747.  
  748.     CheckAttributes( doc, node );
  749.  
  750.     if ( !type || !type->value || !tmbstrlen(type->value) )
  751.     {
  752.         type = RepairAttrValue(doc, node, "type", "text/css");
  753.         ReportAttrError( doc, node, type, INSERTING_ATTRIBUTE );
  754.     }
  755. }
  756.  
  757. /* add missing type attribute when appropriate */
  758. void CheckLINK( TidyDocImpl* doc, Node *node )
  759. {
  760.     AttVal *rel = AttrGetById(node, TidyAttr_REL);
  761.  
  762.     CheckAttributes( doc, node );
  763.  
  764.     /* todo: <link rel="alternate stylesheet"> */
  765.     if (AttrValueIs(rel, "stylesheet"))
  766.     {
  767.         AttVal *type = AttrGetById(node, TidyAttr_TYPE);
  768.         if (!type)
  769.         {
  770.             AddAttribute( doc, node, "type", "text/css" );
  771.             type = AttrGetById(node, TidyAttr_TYPE);
  772.             ReportAttrError( doc, node, type, INSERTING_ATTRIBUTE );
  773.         }
  774.     }
  775. }
  776.  
  777. /* reports missing action attribute */
  778. void CheckFORM( TidyDocImpl* doc, Node *node )
  779. {
  780.     AttVal *action = AttrGetById(node, TidyAttr_ACTION);
  781.  
  782.     CheckAttributes(doc, node);
  783.  
  784.     if (!action)
  785.         ReportMissingAttr(doc, node, "action");
  786. }
  787.  
  788. /* reports missing content attribute */
  789. void CheckMETA( TidyDocImpl* doc, Node *node )
  790. {
  791.     AttVal *content = AttrGetById(node, TidyAttr_CONTENT);
  792.  
  793.     CheckAttributes(doc, node);
  794.  
  795.     if (!content)
  796.         ReportMissingAttr( doc, node, "content" );
  797.     /* name or http-equiv attribute must also be set */
  798. }
  799.  
  800.  
  801. Bool nodeIsText( Node* node )
  802. {
  803.   return ( node && node->type == TextNode );
  804. }
  805.  
  806. Bool nodeHasText( TidyDocImpl* doc, Node* node )
  807. {
  808.   if ( doc && node )
  809.   {
  810.     uint ix;
  811.     Lexer* lexer = doc->lexer;
  812.     for ( ix = node->start; ix < node->end; ++ix )
  813.     {
  814.         /* whitespace */
  815.         if ( !IsWhite( lexer->lexbuf[ix] ) )
  816.             return yes;
  817.     }
  818.   }
  819.   return no;
  820. }
  821.  
  822. Bool nodeIsElement( Node* node )
  823. {
  824.   return ( node && 
  825.            (node->type == StartTag || node->type == StartEndTag) );
  826. }
  827.  
  828. /* Compare & result to operand.  If equal, then all bits
  829. ** requested are set.
  830. */
  831. Bool nodeMatchCM( Node* node, uint contentModel )
  832. {
  833.   return ( node && node->tag && 
  834.            (node->tag->model & contentModel) == contentModel );
  835. }
  836.  
  837. /* True if any of the bits requested are set.
  838. */
  839. Bool nodeHasCM( Node* node, uint contentModel )
  840. {
  841.   return ( node && node->tag && 
  842.            (node->tag->model & contentModel) != 0 );
  843. }
  844.  
  845. Bool nodeCMIsBlock( Node* node )
  846. {
  847.   return nodeHasCM( node, CM_BLOCK );
  848. }
  849. Bool nodeCMIsInline( Node* node )
  850. {
  851.   return nodeHasCM( node, CM_INLINE );
  852. }
  853. Bool nodeCMIsEmpty( Node* node )
  854. {
  855.   return nodeHasCM( node, CM_EMPTY );
  856. }
  857.  
  858. Bool nodeIsHeader( Node* node )
  859. {
  860.     TidyTagId tid = TagId( node  );
  861.     return ( tid && 
  862.              tid == TidyTag_H1 ||
  863.              tid == TidyTag_H2 ||
  864.              tid == TidyTag_H3 ||        
  865.              tid == TidyTag_H4 ||        
  866.              tid == TidyTag_H5 ||
  867.              tid == TidyTag_H6 );
  868. }
  869.  
  870. uint nodeHeaderLevel( Node* node )
  871. {
  872.     TidyTagId tid = TagId( node  );
  873.     switch ( tid )
  874.     {
  875.     case TidyTag_H1:
  876.         return 1;
  877.     case TidyTag_H2:
  878.         return 2;
  879.     case TidyTag_H3:
  880.         return 3;
  881.     case TidyTag_H4:
  882.         return 4;
  883.     case TidyTag_H5:
  884.         return 5;
  885.     case TidyTag_H6:
  886.         return 6;
  887.     default:
  888.     {
  889.         /* fall through */
  890.     }
  891.     }
  892.     return 0;
  893. }
  894.